#Métodos
Múltiples sesiones de programación en R y Python fueron creadas con “Ananconda”. Se utilizaron diversos paquetes, enfocados a la ciencia de datos, cómo “ggplot2”, “caret”, “Keras” y “TensorFlow”.
df = read.csv("FD-04_minero_21062019.csv")
summary(df)
## UID_ENTRADA_CAMION UID_PRODUCTOR ENT_FECHA_ENTRADA
## Min. : 1 Min. : 101.0 Min. :2.008e+13
## 1st Qu.:105363 1st Qu.: 254.0 1st Qu.:2.011e+13
## Median :205492 Median : 463.0 Median :2.014e+13
## Mean :205094 Mean : 553.2 Mean :2.013e+13
## 3rd Qu.:307667 3rd Qu.: 868.0 3rd Qu.:2.015e+13
## Max. :400507 Max. :1223.0 Max. :2.019e+13
##
## ENT_FECHA_SALIDA ENT_PESO_NETO UID_MINA UID_PROVEEDOR
## Min. :2.008e+13 Min. : 0 Min. : 101.0 Min. :100.0
## 1st Qu.:2.011e+13 1st Qu.:14190 1st Qu.: 246.0 1st Qu.:125.0
## Median :2.014e+13 Median :20470 Median : 410.0 Median :163.0
## Mean :2.013e+13 Mean :21675 Mean : 508.5 Mean :209.8
## 3rd Qu.:2.015e+13 3rd Qu.:24610 3rd Qu.: 829.0 3rd Qu.:303.0
## Max. :2.019e+13 Max. :52130 Max. :1127.0 Max. :437.0
## NA's :123997
## CTR_PLAZO DLI_HUMEDAD DLI_CENIZAS
## 92 MESES :103567 Min. : 0.000 Min. : 0.00
## 72 : 44727 1st Qu.: 4.760 1st Qu.:12.82
## 44 MESES : 25732 Median : 6.090 Median :14.67
## 49 MESES : 23522 Mean : 6.651 Mean :14.93
## 78 : 18403 3rd Qu.: 8.100 3rd Qu.:16.74
## 11.5 MESES: 16472 Max. :99.000 Max. :99.00
## (Other) :121015
## DLI_CENIZASMASHUMEDAD DLI_KILOCALORIAS DLI_PESO_A_PAGAR DLI_MEGACALORIAS
## Min. : 0.00 Min. : 0 Min. : 0.00 Min. : 0
## 1st Qu.: 18.51 1st Qu.:5812 1st Qu.:13.90 1st Qu.: 82697
## Median : 21.24 Median :6168 Median :20.31 Median :124376
## Mean : 21.59 Mean :6036 Mean :21.21 Mean :130793
## 3rd Qu.: 24.35 3rd Qu.:6440 3rd Qu.:24.39 3rd Qu.:154120
## Max. :198.00 Max. :7638 Max. :52.19 Max. :360601
##
## DLI_VALOR_TOTAL DLI_REGALIAS DLI_PRECIO_BASE UID_LOCALIDAD_1
## Min. : 0 Min. : 0 Min. : 65450 Min. : 315.0
## 1st Qu.:1276038 1st Qu.: 54494 1st Qu.: 86990 1st Qu.: 377.0
## Median :1927438 Median : 79864 Median :100680 Median : 377.0
## Mean :2076032 Mean : 87357 Mean : 99380 Mean : 451.3
## 3rd Qu.:2590543 3rd Qu.:108128 3rd Qu.:102944 3rd Qu.: 409.0
## Max. :8708121 Max. :388185 Max. :200000 Max. :1240.0
##
str (df)
## 'data.frame': 353438 obs. of 18 variables:
## $ UID_ENTRADA_CAMION : int 1 2 3 4 5 6 7 8 9 10 ...
## $ UID_PRODUCTOR : int 107 316 316 316 316 316 316 316 269 316 ...
## $ ENT_FECHA_ENTRADA : num 2.01e+13 2.01e+13 2.01e+13 2.01e+13 2.01e+13 ...
## $ ENT_FECHA_SALIDA : num 2.01e+13 2.01e+13 2.01e+13 2.01e+13 2.01e+13 ...
## $ ENT_PESO_NETO : int 15240 16480 17380 16770 16860 16360 14980 14440 11080 15320 ...
## $ UID_MINA : int 107 316 316 316 316 316 316 316 269 316 ...
## $ UID_PROVEEDOR : int 125 110 110 110 110 110 110 110 138 110 ...
## $ CTR_PLAZO : Factor w/ 101 levels "1","1 MES","1 MES Y 15 DÍAS",..: 10 78 78 78 78 78 78 78 10 78 ...
## $ DLI_HUMEDAD : num 8.24 5.6 5.6 5.6 5.6 5.6 5.6 5.6 6.12 5.6 ...
## $ DLI_CENIZAS : num 8.89 17.2 17.2 17.2 17.2 17.2 17.2 17.2 9.87 17.2 ...
## $ DLI_CENIZASMASHUMEDAD: num 17.1 22.8 22.8 22.8 22.8 ...
## $ DLI_KILOCALORIAS : num 6341 6436 6436 6436 6436 ...
## $ DLI_PESO_A_PAGAR : num 15.4 16.4 17.3 16.7 16.8 ...
## $ DLI_MEGACALORIAS : num 97492 105641 111410 107500 108077 ...
## $ DLI_VALOR_TOTAL : num 1083250 1173789 1237892 1194444 1200855 ...
## $ DLI_REGALIAS : num 48036 51945 54782 52859 53143 ...
## $ DLI_PRECIO_BASE : int 70000 70000 70000 70000 70000 70000 70000 70000 70000 70000 ...
## $ UID_LOCALIDAD_1 : int 377 430 430 430 430 430 430 430 369 430 ...
library(ggplot2)
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$DLI_VALOR_TOTAL))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Valor Total ($ COL)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$DLI_PRECIO_BASE))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Precio base ($ COL)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$DLI_KILOCALORIAS))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("DLI_Kilocalorías(kcal)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$DLI_REGALIAS))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Regalías ($ COL)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$ENT_PESO_NETO))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Peso neto (kg)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g <- ggplot (data = df,aes(x=1:nrow(df),y=df$DLI_PESO_A_PAGAR))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Peso a pagar (kg)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
cenizas <- replace( df$DLI_CENIZAS,df$DLI_CENIZAS > 40, 40)
df_g <- data.frame(x=1:NROW(cenizas),cenizas=cenizas)
g <- ggplot (data =df_g ,aes(df_g[,1],df_g[,2]))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Cenizas (%)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
humedad <- replace(df$DLI_HUMEDAD, df$DLI_HUMEDAD > 20, 20)
humedad <- data.frame(x=1:NROW(humedad),y=humedad)
g <- ggplot (data = humedad,aes(humedad[,1],humedad[,2]))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Humedad (%)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
cenizas_humedad <- replace( df$DLI_CENIZASMASHUMEDAD,df$DLI_CENIZASMASHUMEDAD > 50, 50)
df_g <- data.frame(x=1:NROW(cenizas_humedad),cenizas=cenizas_humedad)
g <- ggplot (data =df_g ,aes(df_g[,1],df_g[,2]))
g + geom_point(cex=0.005) + geom_smooth()+ xlab ( "Número de la transacción (desde nov. 2008 a jun. 2019)") + ylab ("Cenizas y humedad (%)")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'